home *** CD-ROM | disk | FTP | other *** search
- ;---------------------------Module-Header-------------------------------;
- ; Module Name: SQRT.ASM
- ;
- ; Contains FIXED and ULONG square root
- ;
- ; Created: Sun 30-Aug-1987 19:28:30
- ;
- ; NOTE: the 64-bit sqrt code is 386 specific!
- ;
- ;-----------------------------------------------------------------------;
- ;
- ; (C) Copyright Microsoft Corp. 1987-1991. All rights reserved.
- ;
- ; You have a royalty-free right to use, modify, reproduce and
- ; distribute the Sample Files (and/or any modified version) in
- ; any way you find useful, provided that you agree that
- ; Microsoft has no warranty obligations or liability for any
- ; Sample Application Files which are modified.
- ;
-
- ?WIN = 0
- ?PLM = 0
- ?NODATA = 1
-
- .xlist
- include cmacros.inc
- .list
-
- UQUAD struc
- uq0 dw ?
- uq1 dw ?
- uq2 dw ?
- uq3 dw ?
- UQUAD ends
-
- PTL struc
- ptl_x dd ?
- ptl_y dd ?
- PTL ends
-
- POINTFX struc
- x dd ?
- y dd ?
- z dd ?
- w dd ?
- POINTFX ends
-
-
- MAXINT equ 7FFFh
- MININT equ 8000h
-
-
- ; The following two equates are just used as shorthand
- ; for the "word ptr" and "byte ptr" overrides.
-
- wptr equ word ptr
- bptr equ byte ptr
-
- ; The following structure should be used to access high and low
- ; words of a DWORD. This means that "word ptr foo[2]" -> "foo.hi".
-
- LONG struc
- lo dw ?
- hi dw ?
- LONG ends
-
- FARPOINTER struc
- off dw ?
- sel dw ?
- FARPOINTER ends
-
- assert macro a,b,c,d
- endm
-
- EAXtoDXAX macro
- shld edx,eax,16 ; move HIWORD(eax) to dx
- endm
-
- DXAXtoEAX macro
- ror eax,16 ; xchg HIWORD(eax) and LOWORD(eax)
- shrd eax,edx,16 ; move LOWORD(edx) to HIWORD(eax)
- endm
-
- .386
- sBegin Code
- assumes cs,Code
- assumes es,nothing
- assumes ds,nothing
-
- ;---------------------------Private-Macro-------------------------------;
- ; set_ov
- ;
- ; Sets the overflow flag
- ;
- ; Entry:
- ; reg = word register to use for scratch
- ; Returns:
- ; OF set
- ; Error Returns:
- ; none
- ; Registers Destroyed:
- ; reg
- ;-----------------------------------------------------------------------;
-
- set_ov macro reg
- mov reg,8000h
- dec reg
- endm
-
- ;---------------------------Public-Routine------------------------------;
- ; square_root
- ;
- ; Computes the Fixed square root of a Fixed. This algorithm
- ; comes from Knuth D.E; Metafont:The Program (Addison-Weseley, 1986)
- ; Part 8:Algebraic and Transcendental Functions.
- ;
- ; Notation used below:
- ;
- ; Entry:
- ; DX:AX = number to square root
- ; Returns:
- ; DX:AX = square root of input
- ; Error Returns:
- ; OF = 1
- ; Registers Destroyed:
- ; BX,CX
- ; Registers Preserved:
- ; DS,ES,SI,DI
- ;-----------------------------------------------------------------------;
-
- assumes ds,nothing
- assumes es,nothing
-
- cProc fxsqrt,<PUBLIC,NEAR,NODATA,NONWIN>,<>
- ParmD fx
- cBegin
- mov ax, fx.lo
- mov dx, fx.hi
-
- cCall square_root
-
- mov dh,dl ; DX:AX *= sqrt(1000h)
- mov dl,ah
- mov ah,al
- xor al,al
- cEnd
-
- cProc ulsqrt,<PUBLIC,NEAR,NODATA,NONWIN>,<>
- ParmD ul
- cBegin
- mov ax, ul.lo
- mov dx, ul.hi
-
- cCall square_root
- cEnd
-
- ;
- ; compute the square root of EDX:EAX
- ;
- ; retult returned in EAX and DX:AX
- ;
- ; We dont have a 64-bit square_root function
- ; so approximate it by dividing by 4 until we can use our
- ; 32-bit function
- ;
-
- cProc sqrt64,<PUBLIC,NEAR,NODATA,NONWIN>,<>
- cBegin
- xor cx,cx
-
- test_less_than_32:
- or edx,edx ; is it less than 32-bit?
- jz less_than_32
-
- inc cx
-
- shrd eax,edx,2 ; divide by 4
- shr edx,2
-
- jmp short test_less_than_32
-
- less_than_32:
- ;;
- ;; EDX:EAX is now less than 32 bits, CX contains count of
- ;; divisions by 4 we had to do to get here. call square_root
- ;; and then re-normalize
- ;;
-
- push cx
- EAXtoDXAX
- call square_root
- pop cx
- jcxz sqrt64_exit
-
- @@: shl ax,1
- rcl dx,1
- loop @b
-
- sqrt64_exit:
- DXAXtoEAX
- cEnd
-
- cProc square_root,<PUBLIC,NEAR,NODATA,NONWIN>,<SI,DI>
- ;; ParmD fx
- cBegin
- ;; mov ax, fx.lo
- ;; mov dx, fx.hi
- ; check for < 1 since this algorithm returns 0000:0001
-
- or dx,dx
- jnz non_zero_arg
- cmp ax,1
- ja non_zero_arg
- jmp square_root_exit
-
- negative_arg: ; can't have number negative
- xor ax,ax
- cwd
- set_ov bx
- jmp square_root_exit
-
- non_zero_arg:
- cCall ulNormalize
- ;;;;;;;;jcxz negative_arg
- jz exit_relay
- shr cx,1
- jc add_shifts_only
- shr dx,1
- rcr ax,1
- dec cx
- add_shifts_only:
-
- ;;
- ;; use 23 for a FIXED square_root, and 15 for a ULONG square_root!
- ;;
-
- if 0
- mov ch,23 ; FIXED square_root
- else
- mov ch,15 ; ULONG square_root
- endif
-
- sub ch,cl
- mov cl,8
- sub ch,cl
- jg more_than_8 ; CL = Min(count,8)
- add cl,ch
- xor ch,ch ; CH = Max(count-8,0)
- more_than_8:
-
-
- xchg ax,si ; DI:SI = x
- mov di,dx
-
- xor ax,ax ; AX = y = lower bound = 0
-
- mov bx,2 ; BX = q = estimate of root = 2
-
- add si,si ; intiialize y = top bit of x
- adc di,di
- adc ax,ax
-
- first_8_loop:
- add si,si ; move 2 bits from top
- adc di,di ; of x to bottom of y
- adc ax,ax
- assert NC
-
- add si,si
- adc di,di
- adc ax,ax
- assert NC
-
- sub ax,bx ; AX = y - q
- jle y_neg_or_zero_8
-
- ;!!!CR loops can be cleaned up some
-
- add bx,bx ; BX = 2q
- sub ax,bx ; AX = y - 3q
- jg y_greater_q_8
-
- add ax,bx ; AX = y - q
-
- dec cl
- jnz first_8_loop
-
- jcxz first_8_was_enough
- jmp short done_with_1st_8
-
- y_greater_q_8:
- add bx,2 ; BX = 2q + 2 (or 2q if jg not taken)
-
- dec cl
- jnz first_8_loop
-
- or ch,ch
- jnz done_with_1st_8
-
- first_8_was_enough:
- xchg ax,bx ; DX:AX = q
- xor dx,dx
- shr ax,1 ; root = q >> 1
- or ax,ax ; clear overflow flag
- exit_relay:
- jmp square_root_exit
-
- y_neg_or_zero_8: ; come here if y <= 0
- dec bx
- add bx,bx ; BX = 2q - 2
- add ax,bx ; AX = y + q - 2
-
- dec cl
- jnz first_8_loop
- jcxz first_8_was_enough
-
- ;si is now empty, di contains all signifigant bits
- done_with_1st_8:
-
- mov cl,4
- sub ch,cl
- jg second_4_loop ; CL = Min(count,4)
- add cl,ch
- xor ch,ch ; CH = Max(count-8,0)
-
- second_4_loop:
- ; move 2 bits from top
- add di,di ; of x to bottom of y
- adc ax,ax
- assert NC
-
- add di,di
- adc ax,ax
- assert NC
-
- sub ax,bx ; AX = y - q
- jle y_neg_or_zero_2nd_4
-
- add bx,bx ; BX = 2q
-
- ;!!!CR use compare and jump rather than sub, branch, and restore
- sub ax,bx ; AX = y - 3q
- jg y_greater_q_2nd_4
-
- add ax,bx ; AX = y - q
-
- dec cl
- jnz second_4_loop
-
- jcxz second_4_was_enough
- jmp short done_with_2nd_4
-
- y_greater_q_2nd_4:
- add bx,2 ; BX = 2q + 2 (or 2q if jg not taken)
-
- dec cl
- jnz second_4_loop
-
- or ch,ch
- jnz done_with_2nd_4
-
-
- second_4_was_enough:
- xchg ax,bx ; DX:AX = q
- xor dx,dx
- shr ax,1 ; root = q >> 1
- or ax,ax ; clear overflow flag
- jmp square_root_exit
-
-
- y_neg_or_zero_2nd_4: ; come here if y <= 0
- dec bx
- add bx,bx ; BX = 2q - 2
- add ax,bx ; AX = y + q - 2
-
- dec cl
- jnz second_4_loop
- jcxz second_4_was_enough
-
- done_with_2nd_4:
-
- xor dx,dx ; DX:AX = y SI:BX = q
-
- mov cl,4
- sub ch,cl
- jg third_4_loop ; CL = Min(count,4)
- add cl,ch
- xor ch,ch ; CH = Max(count-13,0)
-
- third_4_loop:
- ;!!!CR Add comment noting that 32 bit math is being used now
- ; move 2 bits from top
- add di,di ; of x to bottom of y
- adc ax,ax
- adc dx,dx
- assert NC
-
- add di,di
- adc ax,ax
- adc dx,dx
- assert NC
-
- sub ax,bx ; DX:AX = y - q
- sbb dx,si
- js y_negative_3rd_4
- jz y_might_be_zero_3rd_4
-
- sorry_y_not_zero_3rd_4:
- add bx,bx
- adc si,si ; SI:BX = 2q
- sub ax,bx ; DX:AX = y - 3q
- sbb dx,si
- jg y_greater_q_3rd_4
- jl y_less_or_equal_q_3rd_4
- or ax,ax
- jnz y_greater_q_3rd_4
-
- y_less_or_equal_q_3rd_4:
- add ax,bx ; DX:AX = y - q
- adc dx,si
-
- dec cl
- jnz third_4_loop
-
- jcxz third_4_was_enough
- jmp short done_with_3rd_4
-
- y_greater_q_3rd_4:
- add bx,2
- adc si,0
-
- dec cl
- jnz third_4_loop
-
- or ch,ch
- jnz done_with_3rd_4
-
- third_4_was_enough:
- xchg ax,bx ; DX:AX = q
- mov dx,si
- shr dx,1 ; root = q >> 1
- rcr ax,1
- or ax,ax ; clear overflow flag
- jmp square_root_exit ;;; short!
-
- y_might_be_zero_3rd_4:
- or ax,ax
- jnz sorry_y_not_zero_3rd_4
-
- y_negative_3rd_4: ; come here if y <= 0
-
- sub bx,1
- sbb si,0
- add bx,bx ; DL:BX = 2q - 2
- adc si,si
-
- add ax,bx
- adc dx,si ; DH:AX = y + q - 2
-
- dec cl
- jnz third_4_loop
- jcxz third_4_was_enough
-
-
- done_with_3rd_4:
-
- xchg ch,cl
-
- last_7_loop:
- add ax,ax
- adc dx,dx
-
- add ax,ax
- adc dx,dx
-
- sub ax,bx ; DX:AX = y - q
- sbb dx,si
- js y_negative_last_7
- jz y_might_be_zero_last_7
-
- ;!!!CR Clean up the exit so that a single exit point is used.
-
- sorry_y_not_zero_last_7:
- add bx,bx
- adc si,si ; SI:BX = 2q
- sub ax,bx ; DX:AX = y - 3q
- sbb dx,si
- jg y_greater_q_last_7
- jl y_less_or_equal_q_last_7
- or ax,ax
- jnz y_greater_q_last_7
-
- y_less_or_equal_q_last_7:
- add ax,bx ; DX:AX = y - q
- adc dx,si
-
- loop last_7_loop
-
- xchg ax,bx ; DX:AX = q
- mov dx,si
- shr dx,1 ; root = q >> 1
- rcr ax,1
- or ax,ax ; clear overflow flag
- jmp short square_root_exit
-
- y_greater_q_last_7:
- add bx,2
- adc si,0
-
- loop last_7_loop
-
- xchg ax,bx ; DX:AX = q
- mov dx,si
- shr dx,1 ; root = q >> 1
- rcr ax,1
- or ax,ax ; clear overflow flag
- jmp short square_root_exit
-
- y_might_be_zero_last_7:
- or ax,ax
- jnz sorry_y_not_zero_last_7
-
- y_negative_last_7: ; come here if y <= 0
-
- sub bx,1
- sbb si,0
- add bx,bx ; DL:BX = 2q - 2
- adc si,si
-
- add ax,bx
- adc dx,si ; DH:AX = y + q - 2
-
- loop last_7_loop
-
- xchg ax,bx ; DX:AX = q
- mov dx,si
- shr dx,1 ; root = q >> 1
- rcr ax,1
-
- or ax,ax ; clear overflow flag
- square_root_exit:
- cEnd
-
- ;---------------------------Public-Routine------------------------------;
- ; ulNormalize
- ;
- ; Normalizes a ULONG so that the highest order bit is 1. Returns the
- ; number of shifts done. Also returns ZF=1 if the ULONG was zero.
- ;
- ; Entry:
- ; DX:AX = ULONG
- ; Returns:
- ; DX:AX = normalized ULONG
- ; CX = shift count
- ; ZF = 1 if the ULONG is zero, 0 otherwise
- ; Registers Destroyed:
- ; none
- ;-----------------------------------------------------------------------;
-
- assumes ds,nothing
- assumes es,nothing
-
- cProc ulNormalize,<PUBLIC,NEAR>
- cBegin
-
- ; shift by words
-
- xor cx,cx
- or dx,dx
- js ulNormalize_exit
- jnz top_word_ok
- xchg ax,dx
- or dx,dx
- jz ulNormalize_exit ; the zero exit
- mov cl,16
- js ulNormalize_exit
- top_word_ok:
-
- ; shift by bytes
-
- or dh,dh
- jnz top_byte_ok
- xchg dh,dl
- xchg dl,ah
- xchg ah,al
- add cl,8
- or dh,dh
- js ulNormalize_exit
- top_byte_ok:
-
- ; do the rest by bits
-
- inc cx
- add ax,ax
- adc dx,dx
- js ulNormalize_exit
- inc cx
- add ax,ax
- adc dx,dx
- js ulNormalize_exit
- inc cx
- add ax,ax
- adc dx,dx
- js ulNormalize_exit
- inc cx
- add ax,ax
- adc dx,dx
- js ulNormalize_exit
- inc cx
- add ax,ax
- adc dx,dx
- js ulNormalize_exit
- inc cx
- add ax,ax
- adc dx,dx
- js ulNormalize_exit
- inc cx
- add ax,ax
- adc dx,dx
- ulNormalize_exit:
- cEnd
-
- sEnd Code
-
- end
-